{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 测试王萌的算法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "(150, 4)"
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "(150,)"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffle_indexes = np.random.permutation(len(X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "array([149,  76, 124, 146,  84,  58,  92,  48,   2,  77,  12,  74, 112,\n       137, 143,  65,  47,  96, 110,  79,  75, 100,  39, 142, 125,  63,\n        24, 115,  43,  55, 121,  35,  38, 141, 128,   3, 116,  99, 136,\n       147, 135, 140,  49,  80,  87, 120,  66, 118,  26,  71, 145,  13,\n        21,   6,  72,  41,   5,  95, 111,  78,  91, 127, 119,  62,   9,\n       113,   7,  86,  31, 144, 114, 130,  10,  60,  40,  88,  56, 148,\n        19,  53, 138, 126, 103, 139,   0,  36,  73,  89,  29, 104,  15,\n        85,  83, 105,  46,   1, 108, 109,  51,  98,  11,  33, 133,  93,\n       131,  16,  45, 101,  81,   4, 132,  54,  14,  69,  23,  32, 102,\n        94,  97,  59, 129, 134,   8,  82,  68,  57,  52, 122, 117,  64,\n        18,  67,  27,  50, 107,  30,  28,  44,  70, 106, 123,  34,  20,\n        17,  37,  22,  90,  42,  61,  25])"
     },
     "metadata": {},
     "execution_count": 9
    }
   ],
   "source": [
    "shuffle_indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_ratio = 0.2\n",
    "test_size = int(len(X) * test_ratio)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "30"
     },
     "metadata": {},
     "execution_count": 11
    }
   ],
   "source": [
    "test_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_indexes = shuffle_indexes[:test_size]\n",
    "train_indexes = shuffle_indexes[test_size:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = X[train_indexes]\n",
    "y_train = y[train_indexes]\n",
    "\n",
    "X_train = X[test_indexes]\n",
    "y_train = y[test_indexes]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "(30, 4)\n(30,)\n"
    }
   ],
   "source": [
    "print(X_train.shape)\n",
    "print(y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from playML.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "(120, 4)\n(120,)\n"
    }
   ],
   "source": [
    "print(X_train.shape)\n",
    "print(y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "(30, 4)\n(30,)\n"
    }
   ],
   "source": [
    "print(X_test.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from playML.kNN2 import kNNClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "my_knn_clf = kNNClassifier(k=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "KNN(k=3)"
     },
     "metadata": {},
     "execution_count": 21
    }
   ],
   "source": [
    "my_knn_clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predict = my_knn_clf.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "array([1, 2, 0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 2, 1, 0, 1, 1, 2, 2, 2, 1, 2,\n       2, 1, 1, 2, 0, 0, 0, 1])"
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "source": [
    "y_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "array([1, 2, 0, 0, 1, 2, 2, 1, 1, 0, 0, 2, 2, 1, 0, 1, 1, 2, 2, 2, 1, 2,\n       2, 1, 1, 2, 0, 0, 0, 1])"
     },
     "metadata": {},
     "execution_count": 25
    }
   ],
   "source": [
    "y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "29"
     },
     "metadata": {},
     "execution_count": 26
    }
   ],
   "source": [
    "sum(y_predict == y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "0.9666666666666667"
     },
     "metadata": {},
     "execution_count": 27
    }
   ],
   "source": [
    "sum(y_predict == y_test) / len(y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## sklearn中的train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state = 666)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "(120, 4)\n(120,)\n"
    }
   ],
   "source": [
    "print(X_train.shape)\n",
    "print(y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "array([1, 2, 1, 2, 0, 1, 1, 2, 1, 1, 1, 0, 0, 0, 2, 1, 0, 2, 2, 2, 1, 0,\n       2, 0, 1, 1, 0, 1, 2, 2])"
     },
     "metadata": {},
     "execution_count": 34
    }
   ],
   "source": [
    "y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}